#Alexander F. Gazmararian
#afg2@princeton.edu
#January 9, 2024

#Purpose: Create matched dataset for subsequent analyses

# Load packages
library(MatchIt)
library(tidyverse)
library(tidylog)
library(CBPS)
library(modelsummary)
library(cobalt)
library(kableExtra)
library(broom)
library(scales)
library(viridis)
library(gridExtra)
library(here)
library(maps)

# Function to format latex table
source(here("code", "fun", "fix_txt.R"))
#Function for rounding
source(here("code", "fun", "digitsByRows.R"))

# Set seed for replication
set.seed(10)

# Load data
g <- readRDS("data/output/pres_out.rds")

# Define model for propensity scores
f.match <- y ~ white + hispanic + foreign + college + income99pclog + poverty + rural_nonfarm + poplog + under40 + work_female
# Table set up
gof_map[gof_map$raw=="nobs",]$clean <- "$N$"
gof_map[gof_map$raw=="adj.r.squared",]$clean <- "Adjusted $R^2$"
# Create data frame for matching
match.df <- subset(g, select = c(
  fips, state, treat, white, hispanic, foreign, college, income99pclog, poverty, rural_nonfarm, poplog, under40, work_female
  ),
  #Subset to observations that have coal employment or are adjacent
  !is.na(treat)
  )
match.df <- unique(match.df)
dim(match.df)
# Estimate propensity scores
fit <- CBPS(update(f.match, treat ~ .), data = match.df, ATT = TRUE)
summary(fit)
# Nearest neighbor matching
m.out <- matchit(treat ~ 1, distance = fitted(fit), method = "nearest", data = match.df, replace = TRUE)
summary(m.out)
# Prepare data for analysis
nn.df <- match.data(m.out)
dim(nn.df)
g.nn <- subset(g, fips %in% nn.df$fips)
# Save output for analyses in other scripts
saveRDS(g.nn, here("data", "output", "matched_df.rds"))
## Table D1: Covariate Balance----
bal.tab <- rbind(balance(fit)$original, balance(fit)$balanced)
match.varnames <- c(
  "White",
  "Hispanic",
  "Foreign-born",
  "College",
  "Income per capita (log)",
  "Poverty",
  "Rural",
  "Population (log)",
  "Under 40 years",
  "Female workforce"
)
rownames(bal.tab) <- rep(match.varnames, 2)
colnames(bal.tab) <- c("Control", "Treated", "Control", "Treated")
kbl(bal.tab, booktabs = TRUE, digits = 2, escape = FALSE, linesep = "", format="latex") %>%
  add_header_above(c(" "=1,"Mean"=2,"Standardized Mean"=2)) %>%
  group_rows("Original:", 1, 10) %>%
  group_rows("Balanced:", 11, 20) %>%
  cat(., file = here("output", "tables", "si_tab_D1_balance.txt"))
## Table D2: Covariate Balance After Nearest Neighbor Matching ----
subset(nn.df, select = c(treat, white, hispanic, foreign, college, income99pclog, poverty, rural_nonfarm, poplog, under40, work_female)) %>%
  mutate(treat=ifelse(treat==1,"Treatment","Control")) %>%
  dplyr::rename(
    White = white,
    Hispanic = hispanic,
    `Foreign-born` = foreign,
    College = college,
    `Income per capita (log)` = income99pclog,
    Poverty = poverty,
    Rural = rural_nonfarm,
    `Population (log)` = poplog,
    `Under 40 years` = under40,
    `Female workforce` = work_female
  ) %>%
  modelsummary::datasummary_balance(
    formula = White + Hispanic + `Foreign-born` + College + `Income per capita (log)` + Poverty + Rural + `Population (log)` + 
      `Under 40 years` + `Female workforce` ~ treat,
    data = .,
    dinm_statistic = "p.value",
    digits = 2,
    fmt = 2,
    output = "data.frame"
  ) %>%
  kableExtra::kbl(booktabs = TRUE, col.names = c("", "Mean", "SD", "Mean", "SD", "Diff.", "$p$"), digits = 3, format = "latex",escape=FALSE,
                  linesep="") %>%
  kableExtra::add_header_above(c(" "=1,"Control\n(N=97)"=2,"Treatment\n(N=116)"=2," "=2),escape=TRUE) %>%
  cat(., file = here("output", "tables", "si_tab_D2_matching.txt"))
## Figure D2: Pre-Trends After Matching ----
match.pretrends <- g.nn %>%
  mutate(treat = ifelse(treat==1,"Treatment","Control"),
         treat = factor(treat,ordered=TRUE,levels=c("Treatment","Control"))) %>%
  group_by(year, treat) %>%
  summarise(vote = mean(RepVotesMajorPercent, na.rm = TRUE)) %>%
  ggplot(aes(x=year,y=vote,color=treat)) +
  geom_vline(xintercept=2008,color="red") +
  geom_line(aes(lty=treat, color=treat), size = 1.25)+
  geom_point(aes(shape=treat,color=treat), size = 3) +
  labs(color="",shape="",lty="",x = "Election", y = "Two-Party Republican Vote Share") +
  scale_color_grey() +
  theme_bw(base_size = 14) +
  scale_x_continuous(breaks = seq(1968, 2020, 4)) +
  scale_y_continuous(limits = c(30, 80), labels = percent_format(scale=1)) +
  theme(
    panel.grid = element_blank(),
    legend.position = c(.07, .13)
  )
ggsave(
  match.pretrends,
  file = here("output", "figures", "si_fig_D2_match_pretrends.png"),
  width = 6.5,
  height = 3, 
  dpi = 300,
  scale = 1.5
)
## Figure D1: Map of Matched Counties----
counties <- maps::county.fips
mapcounties <- ggplot2::map_data("county")
usstates <- map_data("state")
counties <- counties %>% separate(polyname, into = c("state", "county"), sep = ",")
names(counties)[c(2,3)] <- c("region", "subregion")
`%notin%` <- Negate(`%in%`)
g$unmatched <- ifelse(g$fips %notin% g.nn$fips & !is.na(g$adjgroup), "Unmatched", NA)
g$unmatched <- ifelse(is.na(g$unmatched), as.character(g$adjgroup), g$unmatched)
g$unmatched <- ifelse(g$unmatched %in% c("Distant Neighbor", "Near Neighbor"), "Matched", as.character(g$unmatched))
g_map <- merge(g, counties, by = "fips")
g_map <- subset(g_map, select = c(region,subregion,unmatched)) %>% unique()
g_map_out <- left_join(mapcounties, g_map, by = c("subregion", "region"), multiple = "all")
p.match <- g_map_out %>%
  ggplot() +
  geom_polygon(aes(x = long, y = lat, group = group, fill = unmatched), color = "black", size = 0.05) +
  geom_polygon(data=usstates,aes(x=long,y=lat,group=group),color="black",size=.2,fill="transparent") +
  scale_fill_viridis(option="D",discrete=TRUE,na.translate=FALSE) +
  coord_map(projection = "lambert", lat = 52, lon = 10) +
  theme_void() +
  labs(fill="") +
  scale_x_continuous(expand=c(0,0)) +
  scale_y_continuous(expand=c(0,0)) +
  theme(legend.box.margin = margin(l=-30))
p.match
ggsave(
  filename = here("output", "figures", "si_fig_D1_match_map.png"),
  p.match,
  dpi = 300,
  width = 6.5,
  height = 3.5,
  scale = 1.5
)
